#!usr/bin/env python3
# -*- coding:utf-8 -*-
import numpy as np
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error,\
        mean_squared_error, r2_score
# 完整的pipeline
'''preparing data'''
data = pd.read_csv('slr_data.csv',header=None)
print(data.describe())

X = data.iloc[:,:-1]
y = data.iloc[:,-1]
# split data
X_train, X_test, y_train, y_test =\
    train_test_split(X, y, test_size=0.20, random_state=53)

'''training the model'''
clf = LinearRegression()
clf.fit(X_train,y_train)
print("slope m=",clf.coef_[0])
print("intercept b=",clf.intercept_)

'''making predictions'''
y_pred = clf.predict(X_test)
df2 = pd.DataFrame({'X_test':X_test.iloc[:,0],'y_test': y_test, 'y_pred': y_pred}).reset_index(drop=True)
print(df2)

'''evaluating the model'''
# model evaluation
print('Mean Absolute Error:', mean_absolute_error(y_test, y_pred))
print('Mean Squared Error:', mean_squared_error(y_test, y_pred))
print('Root Mean Squared Error:', np.sqrt(mean_squared_error(y_test, y_pred)))
print('R2 Score:', r2_score(y_test, y_pred))

'''visualization'''
plt.plot(data.iloc[:,0],data.iloc[:,1],'ro')
plt.xlim()
plt.ylim()
xmin, xmax = data.iloc[:,0].min(), data.iloc[:,1].max()
xx = np.linspace(xmin, xmax, 101)
yy = clf.coef_ * xx + clf.intercept_
plt.plot(xx, yy, linestyle='-',color='green')
plt.legend(['data points','regression'])
plt.text(0,4,f'y={clf.coef_[0]:.4f}x+{clf.intercept_:.4f}')
plt.show()
